import scrapy
import sys
import os
pwd = os.getcwd()
sys.path.append(os.path.dirname(pwd))
from tutorial.items import *
from scrapy.http import Request
import json


class TVSpider(scrapy.Spider):
    name = "TV"
    allowed_domains = ["v.qq.com"]
    start_urls = [
        "http://v.qq.com/x/list/tv",

    ]
    url = "http://v.qq.com/x/list/tv"

    def parse(self, response):
        for sel in response.xpath('//ul/li'):
            item = TVItem()
            item['title'] = sel.xpath('div/strong/a/text()').extract()
            item['url'] = sel.xpath('div/strong/a/@href').extract()
            item['actor'] = sel.xpath('div/a/text()').extract()
            if item['title']:
                item['title'] = item['title'][0]
                item['url'] = item['url'][0]
                yield item
                # print('TV:' +
                #       item['title'], item['url'], item['actor'])

                # with open(r'data/TV.txt', 'a') as f:
                #     f.write(item['title'] + '\t' + item['url'] +
                #             '\t' + ' '.join(item['actor']) + '\n')
                # with open(r'data/TV.json', 'a') as f2:
                #     json.dump(dict(item), f2, sort_keys=True,
                #               indent=4, ensure_ascii=False)

        nextlink = response.xpath(
            '//div/a[@class="page_next"]/@href').extract()
        if nextlink:
            link = nextlink[0]
            # print("##############")
            # print(self.url + link)
            # print("##############")
            yield Request(self.url + link, callback=self.parse)
